#This script generates synthetic articles for robustness checks across different languages
#It uses GPT-3.5-turbo to generate the articles
#It generates 500 articles for each language and each type of criticism
#It produces a csv file with the articles
#NOTE: This script will not produce exactly the same results as the paper
#Due to the stochastic nature of the language model
import requests
import openai
import time
import csv
import os
from openai import OpenAI


# Initialize the OpenAI client
client = OpenAI(api_key='') #Replace with your own API key 

languages = ["English", "French", "Spanish", "Russian", "Mandarin", "Arabic", "Japanese", "Korean"]

csv_file_path = "data/synthetic/synthetic_articles_all_gpt-3.5-turbo.csv"
csv_dir = os.path.dirname(csv_file_path)

# Check if the directory exists, if not, create it
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)

# Check if the file exists, if not, create it and write the header
if not os.path.exists(csv_file_path):
    with open(csv_file_path, 'w', encoding='utf-8', newline='') as csvfile:
        fieldnames = ['Language', 'Criticism', 'Index', 'Article']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

# Now, open the file in append mode for adding new data
with open(csv_file_path, 'a', encoding='utf-8', newline='') as csvfile:
    fieldnames = ['Language', 'Criticism', 'Index', 'Article']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    global_index = 0  # Initialize global index

    for lang in languages:
        for crit_type in ['Critical', 'Not Critical']:
            for _ in range(500):  # Assuming 500 articles per type
                messages = [
                    {"role": "system", "content": "You are a political journalist."},
                    {"role": "user", "content": f"Write a 500-word political news article in {lang} that is {'' if crit_type == 'Critical' else 'not '}critical of the leader POLITFIG. The article does not require a title. Do not translate the word POLITFIG."},
                ]
                while True:
                    try:
                        response = client.chat.completions.create(model="gpt-3.5-turbo",
                                                                  messages=messages,
                                                                  max_tokens=500,
                                                                  temperature=0.7)
                        message_content = response.choices[0].message.content.strip()
                        global_index += 1
                        writer.writerow({'Language': lang, 'Criticism': crit_type, 'Index': global_index, 'Article': message_content})
                        break
                    except openai.RateLimitError:
                        print("Rate limit hit, sleeping for 60 seconds...")
                        time.sleep(60)
                    except requests.exceptions.ReadTimeout:
                        print("Read timeout, retrying...")
                        continue
                    except requests.exceptions.RequestException as e:
                        print(f"Request exception {e}, sleeping for 60 seconds...")
                        time.sleep(60)
                    except openai.APIError as e:
                        if "502 Bad Gateway" in str(e) or "cf_bad_gateway" in str(e):
                            print("502 Bad Gateway error, sleeping for 60 seconds...")
                            time.sleep(60)
                        else:
                            print(f"API error: {e}")
                            continue
                    except Exception as e:
                        print(f"Unexpected error: {e}, sleeping for 60 seconds...")
                        time.sleep(60)
